/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */

package maindatart.rush.alg;

import java.io.IOException;
import java.io.StringReader;
import java.util.Vector;
import java.util.logging.Level;
import java.util.logging.Logger;
import maindatart.rush.alg.*;
import maindatart.rush.result.RushResultService;
import org.wltea.analyzer.IKSegmentation;
import org.wltea.analyzer.Lexeme;


/**
 *  zhangxu
 * 比较两个文本的相似性
 * 
 * 算法描述：
 * 
 *  先分词，然后比较两个文本的相似性
 * 
 *  按照单字进行匹配
 *  顺序匹配
 * 
 *  将两个文本都拆解为顺序的单字 
 * 
 *      以较文本为准，顺序抽取较短文本中的文字，逐项比对，比对成功则设定游标指定位置，后续文字从游标位置向后继续比对。
 *      最终以比对上字符数量与总字符数量比重
 * 
 * @author Administrator
 */
public class TextFenciSameAlt implements DataSameAlgorithm {

    private static TextFenciSameAlt s = null;
    
    private TextFenciSameAlt(){
        
    }
    
    public static TextFenciSameAlt getInstance(){
        if(s == null){
            s = new TextFenciSameAlt();
        }
        return s;
    }
    
    @Override
    public double itemSame(Object a, Object b) {
        Vector<String> rithtCharacter = new Vector();
       
        if(a == null || b== null)
            return 0;
        
        try{
            String aa = (String)a;
            String bb = (String)b;
            Vector<String> longchar = null;
            Vector<String> shortchar = null;
            if(aa.length() > bb.length()){
                longchar = this.stringToVector(aa);
                shortchar = this.stringToVector(bb);
            }else{
                longchar = this.stringToVector(bb);
                shortchar = this.stringToVector(aa);
            }
            for(String one : shortchar){
                
                for(int i = 0;i < longchar.size();i ++){
                    String other = longchar.get(i);
                    if(other.equalsIgnoreCase(one)){
                        rithtCharacter.add(one);
                        longchar = new Vector(longchar.subList(i, longchar.size()));
                        break;
                    }
                }
            }
            
            return (double)rithtCharacter.size() / (double)shortchar.size();
        }catch(Throwable t){
            Logger.getLogger(RushResultService.class.getName()).log(Level.SEVERE, null, t);
        }
        return 0;
    }
    
    
    private Vector<String> stringToVector(String string){
        
       
        Vector<String> v = new Vector();
        StringReader reader = new StringReader(string);
        IKSegmentation ik = new IKSegmentation(reader,false);//当为true时，分词器进行最大词长切分
        Lexeme lexeme = null;
        try {
            while((lexeme = ik.next())!=null){
                v.add(lexeme.getLexemeText());
            }
        } catch (IOException ex) {
            Logger.getLogger(TextFenciSameAlt.class.getName()).log(Level.SEVERE, null, ex);
        }
        
        return v;
    }
   
    
       @Override
    public String getDescrip() {
        return "字符完全匹配算法";
    }
    
    
    public static void main( String[] args ) throws IOException{
        String str = "用友软件股份有限公司";
        StringReader reader = new StringReader(str);
//        IKSegmenter iks = new IKSegmenter(reader,true);//
        IKSegmentation ik = new IKSegmentation(reader,true);//当为true时，分词器进行最大词长切分
        Lexeme lexeme = null;
        while((lexeme = ik.next())!=null)
        System.out.println(lexeme.getLexemeText());
    }
    
}
